library(RMongo)
## Loading required package: rJava
  library(ggplot2)
  library(plyr)
  library(reshape2)
  library(knitr)

Load data

  cross_validation <- mongoDbConnect("classification_cross_validation", "localhost", 27017)
  performance <- dbGetQuery(cross_validation, "performance", '{}', 0, 0)

Filter just one half of the symmetric (regarding to AUC) data

#   juged.data <- pr.complete[which(pr.complete$criteria %in% c('juged_bad', 'juged_good')), ]
#   interact_length.data <- pr.complete[which(pr.complete$criteria %in% c('short_interactions', 'long_interactions')), ]
#   real_simulated.data <- pr.complete[which(pr.complete$criteria %in% c('real', 'simulated')), ]
#   success.data <- pr.complete[which(pr.complete$criteria %in% c('task_failed', 'task_successful')), ]
#   word_accuracy.data <- pr.complete[which(pr.complete$criteria %in% c('word_accuracy_100', 'word_accuracy_60')), ]
#   simulation_quality.data <- pr.complete[which(pr.complete$criteria %in% c('simulation_quality_best', 'simulation_quality_worst')), ]
#   real_vs_worst_sim.data <- pr.complete[which(pr.complete$criteria %in% c('real_vs_simulated_worst', 'simulated_worst_vs_real')), ]
  
  cutted <- performance[performance$criteria=='juged_bad',]
  cutted <- rbind(cutted, performance[performance$criteria=='short_interactions',])
  cutted <- rbind(cutted, performance[performance$criteria=='real',])
  cutted <- rbind(cutted, performance[performance$criteria=='task_failed',])
  cutted <- rbind(cutted, performance[performance$criteria=='word_accuracy_100',])
  cutted <- rbind(cutted, performance[performance$criteria=='simulation_quality_best',])
  cutted <- rbind(cutted, performance[performance$criteria=='real_vs_simulated_worst',])
  
  # Set names for criteria
  cutted$criteria_name <- 'NA'
  cutted[which(cutted$criteria == 'juged_bad'),]$criteria_name <- 'user jugedment'
  cutted[which(cutted$criteria == 'short_interactions'),]$criteria_name <- 'dialogue length'
  cutted[which(cutted$criteria == 'real'),]$criteria_name <- 'real vs simulated'
  cutted[which(cutted$criteria == 'task_failed'),]$criteria_name <- 'task success'
  cutted[which(cutted$criteria == 'word_accuracy_100'),]$criteria_name <- 'word accuracy'
  cutted[which(cutted$criteria == 'simulation_quality_best'),]$criteria_name <- 'real vs. simulated (good)'
  cutted[which(cutted$criteria == 'real_vs_simulated_worst'),]$criteria_name <- 'real vs. simulated (bad)'

Overview

Histogram of AUC for all 2688 scenarios.

Mean: 0.6595527

Median: 0.6086348

Distribution of AUC values per Distance Measure

AUC in dependency of measures and criteria

  ggplot(cutted, aes(x=criteria_name, y=auc, color=classifier_name)) + geom_point() +
  theme(axis.text.x = element_text(angle=90))

Rank Order

AUC in dependency of n-gram size and criteria

  ggplot(cutted[which(cutted$classifier_name == 'rank order'),], aes(x=criteria_name, y=auc, color=factor(n_gram_size))) +
  geom_point() +
  theme(axis.text.x = element_text(angle=90))

AUC in dependency of frequency threshold and criteria

  ggplot(cutted[which(cutted$classifier_name == 'rank order'),], aes(x=criteria_name, y=auc, color=factor(frequency_threshold))) +
  geom_point() +
  theme(axis.text.x = element_text(angle=90))

AUC in dependency of frequency threshold and criteria

  ggplot(cutted[which(cutted$classifier_name == 'rank order'),], aes(x=criteria_name, y=auc, color=factor(smoothing_value))) +
  geom_point() +
  theme(axis.text.x = element_text(angle=90))

Detailed view on selected scenarios

Histogram for t=1, n=8, word_accuracy_60 and s = 0.25

Get scenatios with AUC = 1.

  # get all performance entries with auc = 1
  p <- cutted[which(cutted$auc == 1),]
    #dbGetQuery(cross_validation, "performance", "{auc: 1}", 0, 0)
## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bindot: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.